import torch
import torch.nn as nn
import torch.nn.functional as F
from utils.masking import TriangularCausalMask, ProbMask
from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
from layers.SelfAttention_Family import FullAttention, ProbAttention, AttentionLayer
from layers.Embed import DataEmbedding,DataEmbedding_only_timemodel,DataEmbedding_timemodel,DataEmbedding_wo_pos,DataEmbedding_wo_temp,DataEmbedding_wo_pos_temp
import numpy as np
import torch.fft


class moving_avg(nn.Module):
    """
    Moving average block to highlight the trend of time series
    """
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        # padding on the both ends of time series
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x


class series_decomp(nn.Module):
    """
    Series decomposition block
    """
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        res = x - moving_mean
        return res, moving_mean


class FeedForwardNetwork(nn.Module):
    def __init__(self, hidden_size, filter_size, dropout_rate=0.1):
        super(FeedForwardNetwork, self).__init__()

        self.layer1 = nn.Linear(hidden_size, hidden_size)
        self.relu = nn.Sigmoid()

        self.dropout = nn.Dropout(dropout_rate)
        self.layer2 = nn.Linear(hidden_size, filter_size)

        self.initialize_weight(self.layer1)
        self.initialize_weight(self.layer2)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        # x = self.dropout(x)
        x = self.layer2(x)
        
        return x

    def initialize_weight(self, x):
        nn.init.xavier_uniform_(x.weight)
        if x.bias is not None:
            nn.init.constant_(x.bias, 0)
            

class Model(nn.Module):
    """
    Informer with Propspare attention in O(LlogL) complexity
    """
    def __init__(self, configs):
        super(Model, self).__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.enc_in = configs.enc_in
        self.c_out = configs.c_out
        self.d_model = configs.d_model
        self.n_model = configs.n_model
        self.t_model = configs.t_model
        self.conv_kernel = configs.conv_kernel   # [(1,8), (3,16), (4,32), (6,64)]
        self.sconv_kernel = configs.sconv_kernel   # [4, 8, 16, 32]
        self.sample_kernel = [14, 14, 14, 14]
        self.period = configs.period   # [(24,862), (24,862), (24,862), (24,862)]
        self.output_attention = configs.output_attention
        self.device = 'cuda:1'
        self.d_layers = configs.d_layers
        self.dropout = 0.05
        self.k = configs.top_k

        # Embedding
        if configs.embed_type == 0:
            self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
                                            configs.dropout)
            self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
                                           configs.dropout)
        elif configs.embed_type == 1:
            # self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
            #                                         configs.dropout)
            # self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
            #                                         configs.dropout)
            self.enc_embedding = DataEmbedding_only_timemodel(configs.enc_in, configs.t_model, configs.d_model, configs.embed, configs.freq,configs.dropout)
            # self.enc_embedding = DataEmbedding_timemodel(configs.enc_in, configs.d_model, configs.embed, configs.freq,
            #                                    configs.dropout)
        elif configs.embed_type == 2:
            self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
                                                    configs.dropout)
            self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
                                                    configs.dropout)

        elif configs.embed_type == 3:
            self.enc_embedding = DataEmbedding_wo_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq,
                                                    configs.dropout)
            self.dec_embedding = DataEmbedding_wo_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq,
                                                    configs.dropout)
        elif configs.embed_type == 4:
            self.enc_embedding = DataEmbedding_wo_pos_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq,
                                                    configs.dropout)
            self.dec_embedding = DataEmbedding_wo_pos_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq,
                                                    configs.dropout)
        
        self.tLinear = FeedForwardNetwork((self.t_model + self.seq_len + self.pred_len * self.d_model), self.pred_len)   
        self.nodevec = nn.Parameter(torch.randn(self.enc_in, self.t_model).to(self.device), requires_grad=True).to(
            self.device)  # (num_nodes,10)

    def forward(self, x, x_mark_dec, y, y_mark_enc, x_fa):
        # x_enc  torch.Size([32, 336, 1])   x_mark_enc  torch.Size([32, 336, 4])
        # print("x_time ", x_time.shape)
        # x_time = self.enc_embedding(x_mark_dec)   # [B, self.seq_len, self.t_model]
        y_time = self.enc_embedding(y_mark_enc)
        # # print("y_time.shape", y_time.shape)
        # t_emb = torch.cat((x_time, y_time), dim=-2)
        t_emb = y_time
        # t_emb = torch.sum(t_emb, dim=-2)   # [B, self.t_model]
        # x_out = self.Linear(x_time)
        # x = x - x_out

        seq_last = x[:,-1:,:].detach()
        x = x - seq_last
        
        st_emb = torch.cat((self.nodevec.unsqueeze(dim=0).repeat(t_emb.shape[0], 1, 1), t_emb.reshape(-1, self.pred_len*self.d_model).unsqueeze(dim=1).repeat(1, self.enc_in, 1)), dim=-1)   # [B, self.enc_in, t+(self.pred_len*self.d_model)]
        st_emb = torch.cat((st_emb, x.permute(0, 2, 1)), dim=-1)
        t_out = self.tLinear(st_emb).permute(0, 2, 1)

        dec = seq_last + t_out

        return dec